The goal of this document is to produce several interactive plots for ATAC data

# Import libraries
library(plotly)
library(heatmaply)
library(chromVAR)
library(motifmatchr)
library(BSgenome.Hsapiens.UCSC.hg19)
library(SummarizedExperiment)
library(BuenColors)
library(chromVARxx)
library(chromVARmotifs)

Initialize parallel processing

library(BiocParallel)
register(MulticoreParam(2)) # adjust according to your machine

Import/Filter data

peakfile <- "../data/peaks/heme_peaks.bed"
peaks <- getPeaks(peakfile)

bamfiles <- list.files("../data/bams", full.names = TRUE)
raw_counts <- getCounts(bamfiles, peaks, paired =  TRUE, by_rg = TRUE, format = "bam",
                              colData = DataFrame(source = bamfiles))
# discuss paired, by_rg, etc.
counts_filtered <- filterSamples(raw_counts, min_depth = 500,
                                  min_in_peaks = 0.15, shiny = FALSE)
counts <- filterPeaks(counts_filtered)

See effect of filtering

dim(raw_counts)
## [1] 590650     16
dim(counts_filtered)
## [1] 590650     12
dim(counts)
## [1] 15518    12

Get GC content/peak; get motifs from chromVARmotifs package; find kmers

counts <- addGCBias(counts, genome = BSgenome.Hsapiens.UCSC.hg19)
data("human_pwms_v1") # also mouse_pwms_v1
motif_ix <- matchMotifs(human_pwms_v1, counts, genome = BSgenome.Hsapiens.UCSC.hg19)
kmer_ix <- matchKmers(5, counts, genome = BSgenome.Hsapiens.UCSC.hg19)

dim(motif_ix)
## [1] 15518  1764
dim(kmer_ix)
## [1] 15518   512

Compute deviations; typically time consuming

dev <- computeDeviations(object = counts, annotations = motif_ix)

Find variable motifs

variabilityAll <- computeVariability(dev)
plotVariability(variabilityAll, use_plotly = TRUE)

Figure 1a; variability across motifs

Find variable bagged motifs

variabilityBagged <- computeVariability(bagged)
plotVariability(variabilityBagged, use_plotly = TRUE)

Figure 1b; variability across reasonably unique motifs

Examine variable motifs in sample

mostvariable <- tail(sort(variabilityAll$variability, index.return = TRUE)$ix,30)
m <- assay(dev)[mostvariable,]
heatmaply(m, colors = jdb_palette("flame_light"))

Figure 2a; motifs x samples

Examine variable bagged motifs in sample

mostvariable <- tail(sort(variabilityBagged$variability, index.return = TRUE)$ix,30)
m <- assay(bagged)[mostvariable,]
heatmaply(m, colors = jdb_palette("flame_light"))

Figure 2b; bagged motifs x samples